{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Practice: download images from web\n",
    "\n",
    "**Download amazing pictures from [national geographic](http://www.nationalgeographic.com.cn/animals/)**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from bs4 import BeautifulSoup\n",
    "import requests\n",
    "\n",
    "URL = \"http://www.nationalgeographic.com.cn/animals/\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## find list of image holder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "html = requests.get(URL).text\n",
    "soup = BeautifulSoup(html, 'lxml')\n",
    "img_ul = soup.find_all('ul', {\"class\": \"img_list\"})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Create a folder for these pictures**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.makedirs('./img/', exist_ok=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## download\n",
    "\n",
    "**Find all picture urls and download them.**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Saved 20171227102206573.jpg\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Saved 20171226032741976.jpg\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Saved 20171220114008366.jpg\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Saved 20171219102251229.jpg\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Saved 20171215043409613.jpg\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Saved 20171214020322682.jpg\n"
     ]
    }
   ],
   "source": [
    "for ul in img_ul:\n",
    "    imgs = ul.find_all('img')\n",
    "    for img in imgs:\n",
    "        url = img['src']\n",
    "        r = requests.get(url, stream=True)\n",
    "        image_name = url.split('/')[-1]\n",
    "        with open('./img/%s' % image_name, 'wb') as f:\n",
    "            for chunk in r.iter_content(chunk_size=128):\n",
    "                f.write(chunk)\n",
    "        print('Saved %s' % image_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "![image](/img/20171214020322682.jpg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
